﻿import string
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
 
hist = {}
#取出文件内容
f = open('《教父》小说英文版.txt','r')
content = f.read()
f.close()

#去除连字符
content = content.replace('-',' ')
#print(content)

# 字符串分割
words = content.split()
#print(words)

#迭代处理
for i in range(len(words)):
	#去掉标点符号,去掉首尾?
	words[i] = words[i].strip(string.punctuation)
	#统一大小写
	words[i] = words[i].lower()
	
	#统计词频
	if words[i] in hist:
		#不是第一次
		hist[words[i]] += 1
	else:
		hist[words[i]] = 1

#生成字典	
data = []
for key,value in hist.items():
	temp = [value,key]
	data.append(temp)
#排序 True 从大到小
data.sort(reverse=True)

print(len(data))

#画图分析数据
for i in range(10):
	plt.bar(data[i][1],data[i][0])

plt.legend()
plt.xlabel("单词")
plt.ylabel("词频")
plt.show()
	
	
	
	
	
	
	
	
	
	
	
	
	
	




